from rank_bm25 import BM25Okapi

def get_top_n_sim_text(query,documents,top_n=5):
    #生成语料
    corpus = []
    for doc in documents:
        text = []
        for c in doc:
            text.append(c)
        corpus.append(text)
    #对语料进行bm25ok算法
    bm25ok = BM25Okapi(corpus)
    text_query = [char for char in query]
    result = bm25ok.get_top_n(text_query,corpus,1)
    print(result)
    return result


prompt_text = "明天是什么天气"
context_list = ["哪个颜色好看","今天晚上吃什么","你家电话多少","明天的天气是晴天","晚上的月亮好美呀"]
result = get_top_n_sim_text(query=prompt_text,documents=context_list,top_n=1)
rs = ["".join(res) for res in result]
print(rs)
